# Import Data Cube Utilities
import datacube
import sys, os
os.environ['USE_PYGEOS'] = '0'
from dea_tools.plotting import rgb, display_map
from dea_tools.bandindices import calculate_indices
### EASI tools
sys.path.append(os.path.expanduser('../scripts'))
from ceos_utils.data_cube_utilities.clean_mask import landsat_clean_mask_invalid, landsat_qa_clean_mask
from easi_tools import EasiDefaults
from easi_tools import notebook_utils
easi = EasiDefaults() # Get the default parameters for this system
# Import Common Utilities
from functools import partial
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
Successfully found configuration for deployment "eail"
cluster, client = notebook_utils.initialize_dask(use_gateway=False)
display(cluster if cluster else client)
print(notebook_utils.localcluster_dashboard(client, server=easi.hub))
5911d938
| Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
| Total threads: 8 | Total memory: 29.00 GiB |
| Status: running | Using processes: True |
Scheduler-1f0979a2-b391-4c98-8709-c254ea225466
| Comm: tcp://127.0.0.1:42963 | Workers: 4 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 8 |
| Started: Just now | Total memory: 29.00 GiB |
| Comm: tcp://127.0.0.1:38511 | Total threads: 2 |
| Dashboard: http://127.0.0.1:35211/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:43783 | |
| Local directory: /tmp/dask-worker-space/worker-34oce967 | |
| Comm: tcp://127.0.0.1:36783 | Total threads: 2 |
| Dashboard: http://127.0.0.1:40169/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:43167 | |
| Local directory: /tmp/dask-worker-space/worker-w4xminv9 | |
| Comm: tcp://127.0.0.1:36193 | Total threads: 2 |
| Dashboard: http://127.0.0.1:38873/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:38327 | |
| Local directory: /tmp/dask-worker-space/worker-bi5xr7vb | |
| Comm: tcp://127.0.0.1:33987 | Total threads: 2 |
| Dashboard: http://127.0.0.1:41977/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:38279 | |
| Local directory: /tmp/dask-worker-space/worker-j44blu3e | |
https://hub.eail.easi-eo.solutions/user/jhodge/proxy/8787/status
from datacube.utils.aws import configure_s3_access
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client)
<botocore.credentials.Credentials at 0x7f2979c309d0>
# Define the Product
product = "landsat8_c2l2_sr"
# Select a Latitude-Longitude point for the center of the analysis region
# Select the size of the box (in degrees) surrounding the center point
# Mombasa, Kenya
# lat_long = (-4.03, 39.62)
# box_size_deg = 0.15
# Calculates the latitude and longitude bounds of the analysis box
# latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
# longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)
# Sydney Cricket Ground
# latitude = (-33.8951, -33.8902)
# longitude = (151.2219, 151.2276)
# Suva, Fiji
# latitude = (-18.1725, -18.0492)
# longitude = (178.3881, 178.5190)
# Bua Bay, Fiji
# latitude = (-17.0069, -16.7447)
# longitude = (178.4270, 178.6750)
latitude = easi.latitude
longitude = easi.longitude
# Select a time range
# The inputs require a format (Min,Max) using this date format (YYYY-MM-DD)
# The Landsat-8 allowable time range is: 2013-04-07 to current
time_extents = ('2021-01-01', '2021-06-01')
# The code below renders a map that can be used to view the region.
# It is possible to find new regions using the map below.
# Use your mouse to zoom in/out to explore new regions
# Click on the map to view Lat-Lon coordinates of any location that could define the region boundary
display_map(longitude, latitude)
/env/lib/python3.10/site-packages/dea_tools/plotting.py:313: FutureWarning: This function is deprecated. See: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1
all_longitude, all_latitude = transform(Proj(crs), Proj('EPSG:4326'), all_x,
# Create a custom cloud coverage table here
def build_cloud_coverage_table_landsat(product,platform,collection,level,
latitude,longitude,time=None,dc=None,
extra_band='green',extra_load_params={}):
dc = dc if dc is not None else datacube.Datacube(app="")
load_params = dict(product=product,
latitude=latitude,
longitude=longitude,
measurements=[extra_band, 'pixel_qa'],
**extra_load_params)
if time is not None:
load_params["time"] = time
landsat_dataset = dc.load(**load_params).persist()
dates = [dt.astype('datetime64[D]') for dt in landsat_dataset.time.values]
clean_masks = landsat_qa_clean_mask(landsat_dataset, platform=platform,
collection=collection, level=level) & \
landsat_clean_mask_invalid(landsat_dataset, platform, collection, level)
clean_percent = [clean_mask.mean()*100 for clean_mask in clean_masks.values]
clean_count = list(map(np.sum, clean_masks.values))
nodata_masks = xr.full_like(clean_masks, False)
band_nodata_values = dc.list_measurements().loc[product, 'nodata']
if band_nodata_values is not None:
for data_var in landsat_dataset.values():
band_nodata_masks = data_var == data_var.attrs['nodata']
nodata_masks = nodata_masks | band_nodata_masks
nodata_percent = [nodata_mask.mean()*100 for nodata_mask in nodata_masks.values]
cloud_masks = (~clean_masks & ~nodata_masks)
cloud_percent = [cloud_mask.mean()*100 for cloud_mask in cloud_masks.values]
total_count = list(map(np.sum, ~nodata_masks.values))
data = {"Date": dates,"Clean_percent": clean_percent,"Cloud_percent": cloud_percent,
"NoData_percent": nodata_percent,"Clean_count": clean_count,"Total_count": total_count}
return (landsat_dataset,
pd.DataFrame(data=data, columns=list(data.keys())),
cloud_masks, nodata_masks, clean_masks)
dc = datacube.Datacube()
# Load the metadata for the given region and time period
landsat_dataset, coverage_table = build_cloud_coverage_table_landsat(dc=dc,
product=product,
platform='LANDSAT_8',
collection='c2',
level='l2',
latitude=latitude,
longitude=longitude,
time=time_extents,
extra_band='green',
extra_load_params={
'output_crs':'EPSG:6933',
'resolution': (-30,30),
'skip_broken_datasets': True,
'dask_chunks': {'time':1},
'group_by': 'solar_day'
}
)[0:2]
This table displays data for each time slice in the cube (starting at an index=0). The "clean percent" is the percent of pixels WITHOUT clouds. So, low numbers are cloudy scenes and high numbers are clearer scenes. The "Clean_count" is the number of clear pixels in the scene and the "Total_count" is the total number of pixels in the scene.
Typically, there is a separation of 16 days between Landsat-8 views for a single location. When there is significant cloud cover, scenes may be missing from time series due to issues with processing and geolocation.
pd.set_option('display.max_rows', len(coverage_table))
coverage_table
| Date | Clean_percent | Cloud_percent | NoData_percent | Clean_count | Total_count | |
|---|---|---|---|---|---|---|
| 0 | 2021-01-05 | 1.854273 | 97.636301 | 0.509426 | 2042 | 109563 |
| 1 | 2021-01-14 | 0.000000 | 100.000000 | 0.000000 | 0 | 110124 |
| 2 | 2021-01-21 | 0.000000 | 100.000000 | 0.000000 | 0 | 110124 |
| 3 | 2021-01-30 | 99.522357 | 0.477643 | 0.000000 | 109598 | 110124 |
| 4 | 2021-02-06 | 99.436998 | 0.563002 | 0.000000 | 109504 | 110124 |
| 5 | 2021-03-03 | 99.992735 | 0.007265 | 0.000000 | 110116 | 110124 |
| 6 | 2021-03-10 | 99.591370 | 0.408630 | 0.000000 | 109674 | 110124 |
| 7 | 2021-03-26 | 63.416694 | 36.583306 | 0.000000 | 69837 | 110124 |
| 8 | 2021-04-04 | 99.929171 | 0.070829 | 0.000000 | 110046 | 110124 |
| 9 | 2021-04-11 | 80.254077 | 19.745923 | 0.000000 | 88379 | 110124 |
| 10 | 2021-04-20 | 9.891577 | 90.108423 | 0.000000 | 10893 | 110124 |
| 11 | 2021-04-27 | 99.957321 | 0.042679 | 0.000000 | 110077 | 110124 |
| 12 | 2021-05-06 | 99.999092 | 0.000908 | 0.000000 | 110123 | 110124 |
| 13 | 2021-05-13 | 99.854709 | 0.145291 | 0.000000 | 109964 | 110124 |
| 14 | 2021-05-22 | 0.000000 | 100.000000 | 0.000000 | 0 | 110124 |
The y-axis is the "clean percentage" from the table above.
plt.figure(figsize = (15,5))
plt.plot(coverage_table["Date"].values, coverage_table["Clean_percent"].values, 'bo', markersize=8)
plt.ylim([0, 105])
plt.show()
# Load the data to create an RGB image
landsat_dataset = dc.load(like=landsat_dataset,
product=product,measurements=['red', 'green', 'blue', 'nir', 'swir1', 'swir2'],
dask_chunks={'time':1})
# Select one of the time slices and create an output image.
# Time slices are numbered from 0 to x and shown in the summary table above
# Review the clean_percentage values to select scenes with few clouds
# Clouds will be visible in WHITE for an output image
slice = 4
# Collection 2 needs scale and offset applied for better color rendering
scale = lambda da: (da*0.0000275)-0.2
# Define the RGB image parameters
# True-Color = red, green, blue (this is the common true-color RGB image)
# False Color = swir2, nir, green (this is commonly used for Landsat data viewing)
true_rgb = landsat_dataset.isel(time=slice)[['red', 'green', 'blue']].map(scale).to_array()
false_rgb = landsat_dataset.isel(time=slice)[['swir2', 'nir', 'green']].map(scale).to_array()
# Define the plot settings and show the plots
# Users may want to alter the figure sizes or plot titles
# The "vmax" value controls the brightness of the images and can be adjusted
fig, ax = plt.subplots(1, 2, figsize=(16, 8))
true_rgb.plot.imshow(ax=ax[0], vmin=0, vmax=0.2)
false_rgb.plot.imshow(ax=ax[1], vmin=0, vmax=0.5)
ax[0].set_title('True Color'), ax[0].xaxis.set_visible(False), ax[0].yaxis.set_visible(False)
ax[1].set_title('False Color'), ax[1].xaxis.set_visible(False), ax[1].yaxis.set_visible(False)
plt.show()